FINAL CAPSTONE PROJECT OVERVIEW¶
This is the final capstone project for the CFA practical skills module: Python Finance Fundemantals. I've made some minor adjustments / improvements to help improve the explanations so a person without a strong coding background can interpert this.
#Import libraries
import pandas as pd
import numpy as np
import datetime as df
# Use Pandas to read stock data (the csv file is included in the course package)
stock_df = pd.read_csv('AMZN.csv')
stock_df.head(15)
| Date | Open | High | Low | Close | Adj Close | Volume | |
|---|---|---|---|---|---|---|---|
| 0 | 2023-05-30 | 122.370003 | 122.919998 | 119.860001 | 121.660004 | 121.660004 | 64314800 |
| 1 | 2023-05-31 | 121.449997 | 122.040001 | 119.169998 | 120.580002 | 120.580002 | 72800800 |
| 2 | 2023-06-01 | 120.690002 | 123.489998 | 119.930000 | 122.769997 | 122.769997 | 54375100 |
| 3 | 2023-06-02 | 124.919998 | 126.389999 | 124.019997 | 124.250000 | 124.250000 | 61215000 |
| 4 | 2023-06-05 | 123.360001 | 125.800003 | 123.029999 | 125.300003 | 125.300003 | 47950100 |
| 5 | 2023-06-06 | 125.070000 | 127.400002 | 125.000000 | 126.610001 | 126.610001 | 45695200 |
| 6 | 2023-06-07 | 127.010002 | 127.370003 | 120.629997 | 121.230003 | 121.230003 | 95663300 |
| 7 | 2023-06-08 | 123.010002 | 125.629997 | 122.260002 | 124.250000 | 124.250000 | 62159300 |
| 8 | 2023-06-09 | 124.080002 | 125.800003 | 123.190002 | 123.430000 | 123.430000 | 51330000 |
| 9 | 2023-06-12 | 124.019997 | 126.779999 | 123.529999 | 126.570000 | 126.570000 | 51338000 |
| 10 | 2023-06-13 | 128.119995 | 128.410004 | 125.180000 | 126.660004 | 126.660004 | 50564800 |
| 11 | 2023-06-14 | 126.699997 | 126.949997 | 124.120003 | 126.419998 | 126.419998 | 52422500 |
| 12 | 2023-06-15 | 125.209999 | 127.690002 | 124.320000 | 127.110001 | 127.110001 | 60458500 |
| 13 | 2023-06-16 | 127.709999 | 127.900002 | 125.300003 | 125.489998 | 125.489998 | 84188100 |
| 14 | 2023-06-20 | 124.970001 | 127.250000 | 124.500000 | 125.779999 | 125.779999 | 56930100 |
# Count the number of missing values in "stock_df" Pandas DataFrame
stock_df.isnull().sum()
Date 0 Open 0 High 0 Low 0 Close 0 Adj Close 0 Volume 0 dtype: int64
# Obtain information about the Pandas DataFrame such as data types, memory utilization..etc
stock_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 250 entries, 0 to 249 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Date 250 non-null object 1 Open 250 non-null float64 2 High 250 non-null float64 3 Low 250 non-null float64 4 Close 250 non-null float64 5 Adj Close 250 non-null float64 6 Volume 250 non-null int64 dtypes: float64(5), int64(1), object(1) memory usage: 13.8+ KB
# Calculate the percentage daily return
stock_df['Daily Return'] = stock_df['Adj Close'].pct_change(1) * 100
stock_df
| Date | Open | High | Low | Close | Adj Close | Volume | Daily Return | |
|---|---|---|---|---|---|---|---|---|
| 0 | 2023-05-30 | 122.370003 | 122.919998 | 119.860001 | 121.660004 | 121.660004 | 64314800 | NaN |
| 1 | 2023-05-31 | 121.449997 | 122.040001 | 119.169998 | 120.580002 | 120.580002 | 72800800 | -0.887721 |
| 2 | 2023-06-01 | 120.690002 | 123.489998 | 119.930000 | 122.769997 | 122.769997 | 54375100 | 1.816217 |
| 3 | 2023-06-02 | 124.919998 | 126.389999 | 124.019997 | 124.250000 | 124.250000 | 61215000 | 1.205509 |
| 4 | 2023-06-05 | 123.360001 | 125.800003 | 123.029999 | 125.300003 | 125.300003 | 47950100 | 0.845073 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 245 | 2024-05-20 | 184.339996 | 186.669998 | 183.279999 | 183.539993 | 183.539993 | 30511800 | -0.628048 |
| 246 | 2024-05-21 | 182.300003 | 183.259995 | 180.750000 | 183.149994 | 183.149994 | 50839100 | -0.212487 |
| 247 | 2024-05-22 | 183.880005 | 185.220001 | 181.970001 | 183.130005 | 183.130005 | 28148800 | -0.010914 |
| 248 | 2024-05-23 | 183.660004 | 184.759995 | 180.080002 | 181.050003 | 181.050003 | 33670200 | -1.135806 |
| 249 | 2024-05-24 | 181.649994 | 182.440002 | 180.300003 | 180.750000 | 180.750000 | 27434100 | -0.165702 |
250 rows Ć 8 columns
#Replace the Not a Number, NaN, with 0 instead.
stock_df['Daily Return'].replace(np.nan, 0, inplace = True)
stock_df
| Date | Open | High | Low | Close | Adj Close | Volume | Daily Return | |
|---|---|---|---|---|---|---|---|---|
| 0 | 2023-05-30 | 122.370003 | 122.919998 | 119.860001 | 121.660004 | 121.660004 | 64314800 | 0.000000 |
| 1 | 2023-05-31 | 121.449997 | 122.040001 | 119.169998 | 120.580002 | 120.580002 | 72800800 | -0.887721 |
| 2 | 2023-06-01 | 120.690002 | 123.489998 | 119.930000 | 122.769997 | 122.769997 | 54375100 | 1.816217 |
| 3 | 2023-06-02 | 124.919998 | 126.389999 | 124.019997 | 124.250000 | 124.250000 | 61215000 | 1.205509 |
| 4 | 2023-06-05 | 123.360001 | 125.800003 | 123.029999 | 125.300003 | 125.300003 | 47950100 | 0.845073 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 245 | 2024-05-20 | 184.339996 | 186.669998 | 183.279999 | 183.539993 | 183.539993 | 30511800 | -0.628048 |
| 246 | 2024-05-21 | 182.300003 | 183.259995 | 180.750000 | 183.149994 | 183.149994 | 50839100 | -0.212487 |
| 247 | 2024-05-22 | 183.880005 | 185.220001 | 181.970001 | 183.130005 | 183.130005 | 28148800 | -0.010914 |
| 248 | 2024-05-23 | 183.660004 | 184.759995 | 180.080002 | 181.050003 | 181.050003 | 33670200 | -1.135806 |
| 249 | 2024-05-24 | 181.649994 | 182.440002 | 180.300003 | 180.750000 | 180.750000 | 27434100 | -0.165702 |
250 rows Ć 8 columns
# Use the describe() method to obtain a statistical summary about the data
stock_df.describe().round(2)
| Open | High | Low | Close | Adj Close | Volume | Daily Return | |
|---|---|---|---|---|---|---|---|
| count | 250.00 | 250.00 | 250.00 | 250.00 | 250.00 | 2.500000e+02 | 250.00 |
| mean | 150.63 | 152.22 | 149.02 | 150.68 | 150.68 | 4.924186e+07 | 0.17 |
| std | 21.17 | 21.22 | 21.04 | 21.14 | 21.14 | 1.789915e+07 | 1.77 |
| min | 120.63 | 121.64 | 118.35 | 119.57 | 119.57 | 2.237840e+07 | -5.58 |
| 25% | 131.41 | 133.02 | 129.62 | 131.73 | 131.73 | 3.891888e+07 | -0.88 |
| 50% | 145.11 | 147.06 | 144.17 | 145.52 | 145.52 | 4.574090e+07 | 0.05 |
| 75% | 173.40 | 174.95 | 171.93 | 173.64 | 173.64 | 5.435895e+07 | 1.17 |
| max | 189.16 | 191.70 | 187.44 | 189.50 | 189.50 | 1.529387e+08 | 8.27 |
Over the specified time period, the average adjusted close price for Amazon stock was 150.68 dollars. The maximum adjusted close price was $189.50 The maximum volume of shares traded on one day were 152,938,700
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
# Plot a Line Plot Using Plotly Express, for the adjusted closing price of Amazon
fig = px.line(title = 'Amazon.com, Inc. (AMZN) Adjusted Closing Price [$]')
fig.add_scatter(x = stock_df['Date'], y = stock_df['Adj Close'], name = 'Adj Close')
stock_df
| Date | Open | High | Low | Close | Adj Close | Volume | Daily Return | |
|---|---|---|---|---|---|---|---|---|
| 0 | 2023-05-30 | 122.370003 | 122.919998 | 119.860001 | 121.660004 | 121.660004 | 64314800 | 0.000000 |
| 1 | 2023-05-31 | 121.449997 | 122.040001 | 119.169998 | 120.580002 | 120.580002 | 72800800 | -0.887721 |
| 2 | 2023-06-01 | 120.690002 | 123.489998 | 119.930000 | 122.769997 | 122.769997 | 54375100 | 1.816217 |
| 3 | 2023-06-02 | 124.919998 | 126.389999 | 124.019997 | 124.250000 | 124.250000 | 61215000 | 1.205509 |
| 4 | 2023-06-05 | 123.360001 | 125.800003 | 123.029999 | 125.300003 | 125.300003 | 47950100 | 0.845073 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 245 | 2024-05-20 | 184.339996 | 186.669998 | 183.279999 | 183.539993 | 183.539993 | 30511800 | -0.628048 |
| 246 | 2024-05-21 | 182.300003 | 183.259995 | 180.750000 | 183.149994 | 183.149994 | 50839100 | -0.212487 |
| 247 | 2024-05-22 | 183.880005 | 185.220001 | 181.970001 | 183.130005 | 183.130005 | 28148800 | -0.010914 |
| 248 | 2024-05-23 | 183.660004 | 184.759995 | 180.080002 | 181.050003 | 181.050003 | 33670200 | -1.135806 |
| 249 | 2024-05-24 | 181.649994 | 182.440002 | 180.300003 | 180.750000 | 180.750000 | 27434100 | -0.165702 |
250 rows Ć 8 columns
# Define a function that performs interactive data visualization using Plotly Express
def plot_financial_data(df, title):
fig = px.line(title = title)
# For loop that plots all stock prices in the pandas dataframe df
# Note that index starts with 1 because we want to skip the date column
for i in df.columns[1:]:
fig.add_scatter(x = df['Date'], y = df[i], name = i)
fig.update_traces(line_width = 5)
fig.update_layout({'plot_bgcolor': "white"})
fig.show()
# Plot High, Low, Open, Close and Adj Close
plot_financial_data(stock_df.drop(['Volume', 'Daily Return'], axis = 1), 'Amazon.com, Inc. (AMZN) Stock Price [$]')
# Plot trading volume
plot_financial_data(stock_df.iloc[:,[0,5]], 'Amazon.com, Inc. (AMZN) Trading Volume')
# Plot % Daily Returns
plot_financial_data(stock_df.iloc[:,[0,7]], 'Amazon.com, Inc. (AMZN) Percentage Daily Return [%]')
# Define a function that classifies the returns based on the magnitude
# Feel free to change these numbers
def percentage_return_classifier(percentage_return):
if percentage_return > -0.3 and percentage_return <= 0.3:
return 'Insignificant Change'
elif percentage_return > 0.3 and percentage_return <= 3:
return 'Positive Change'
elif percentage_return > -3 and percentage_return <= -0.3:
return 'Negative Change'
elif percentage_return > 3 and percentage_return <= 7:
return 'Large Positive Change'
elif percentage_return > -7 and percentage_return <= -3:
return 'Large Negative Change'
elif percentage_return > 7:
return 'Bull Run'
elif percentage_return <= -7:
return 'Bear Sell Off'
# Apply the function to the "Daily Return" Column and place the result in "Trend" column
stock_df['Trend'] = stock_df['Daily Return'].apply(percentage_return_classifier)
stock_df
| Date | Open | High | Low | Close | Adj Close | Volume | Daily Return | Trend | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 2023-05-30 | 122.370003 | 122.919998 | 119.860001 | 121.660004 | 121.660004 | 64314800 | 0.000000 | Insignificant Change |
| 1 | 2023-05-31 | 121.449997 | 122.040001 | 119.169998 | 120.580002 | 120.580002 | 72800800 | -0.887721 | Negative Change |
| 2 | 2023-06-01 | 120.690002 | 123.489998 | 119.930000 | 122.769997 | 122.769997 | 54375100 | 1.816217 | Positive Change |
| 3 | 2023-06-02 | 124.919998 | 126.389999 | 124.019997 | 124.250000 | 124.250000 | 61215000 | 1.205509 | Positive Change |
| 4 | 2023-06-05 | 123.360001 | 125.800003 | 123.029999 | 125.300003 | 125.300003 | 47950100 | 0.845073 | Positive Change |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 245 | 2024-05-20 | 184.339996 | 186.669998 | 183.279999 | 183.539993 | 183.539993 | 30511800 | -0.628048 | Negative Change |
| 246 | 2024-05-21 | 182.300003 | 183.259995 | 180.750000 | 183.149994 | 183.149994 | 50839100 | -0.212487 | Insignificant Change |
| 247 | 2024-05-22 | 183.880005 | 185.220001 | 181.970001 | 183.130005 | 183.130005 | 28148800 | -0.010914 | Insignificant Change |
| 248 | 2024-05-23 | 183.660004 | 184.759995 | 180.080002 | 181.050003 | 181.050003 | 33670200 | -1.135806 | Negative Change |
| 249 | 2024-05-24 | 181.649994 | 182.440002 | 180.300003 | 180.750000 | 180.750000 | 27434100 | -0.165702 | Insignificant Change |
250 rows Ć 9 columns
# Count distinct values in the Trend column
trend_summary = stock_df['Trend'].value_counts()
trend_summary
Trend Positive Change 98 Negative Change 87 Insignificant Change 48 Large Positive Change 8 Large Negative Change 7 Bull Run 2 Name: count, dtype: int64
# Plot a pie chart using Matplotlib Library
plt.figure(figsize = (8, 8))
trend_summary.plot(kind = 'pie', y = 'Trend');
# Let's plot a candlestick graph using Cufflinks library
# Cufflinks is a powerful Python library that connects Pandas and Plotly for generating plots using few lines of code
# Cufflinks allows for interactive data visualization
import cufflinks as cf
cf.go_offline() # Enabling offline mode for interactive data visualization locally
stock_df
| Date | Open | High | Low | Close | Adj Close | Volume | Daily Return | Trend | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 2023-05-30 | 122.370003 | 122.919998 | 119.860001 | 121.660004 | 121.660004 | 64314800 | 0.000000 | Insignificant Change |
| 1 | 2023-05-31 | 121.449997 | 122.040001 | 119.169998 | 120.580002 | 120.580002 | 72800800 | -0.887721 | Negative Change |
| 2 | 2023-06-01 | 120.690002 | 123.489998 | 119.930000 | 122.769997 | 122.769997 | 54375100 | 1.816217 | Positive Change |
| 3 | 2023-06-02 | 124.919998 | 126.389999 | 124.019997 | 124.250000 | 124.250000 | 61215000 | 1.205509 | Positive Change |
| 4 | 2023-06-05 | 123.360001 | 125.800003 | 123.029999 | 125.300003 | 125.300003 | 47950100 | 0.845073 | Positive Change |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 245 | 2024-05-20 | 184.339996 | 186.669998 | 183.279999 | 183.539993 | 183.539993 | 30511800 | -0.628048 | Negative Change |
| 246 | 2024-05-21 | 182.300003 | 183.259995 | 180.750000 | 183.149994 | 183.149994 | 50839100 | -0.212487 | Insignificant Change |
| 247 | 2024-05-22 | 183.880005 | 185.220001 | 181.970001 | 183.130005 | 183.130005 | 28148800 | -0.010914 | Insignificant Change |
| 248 | 2024-05-23 | 183.660004 | 184.759995 | 180.080002 | 181.050003 | 181.050003 | 33670200 | -1.135806 | Negative Change |
| 249 | 2024-05-24 | 181.649994 | 182.440002 | 180.300003 | 180.750000 | 180.750000 | 27434100 | -0.165702 | Insignificant Change |
250 rows Ć 9 columns
# Set the date to be the index for the Pandas DataFrame
# This is critical to show the date on the x-axis when using cufflinks
stock_df.set_index(['Date'], inplace = True)
stock_df
| Open | High | Low | Close | Adj Close | Volume | Daily Return | Trend | |
|---|---|---|---|---|---|---|---|---|
| Date | ||||||||
| 2023-05-30 | 122.370003 | 122.919998 | 119.860001 | 121.660004 | 121.660004 | 64314800 | 0.000000 | Insignificant Change |
| 2023-05-31 | 121.449997 | 122.040001 | 119.169998 | 120.580002 | 120.580002 | 72800800 | -0.887721 | Negative Change |
| 2023-06-01 | 120.690002 | 123.489998 | 119.930000 | 122.769997 | 122.769997 | 54375100 | 1.816217 | Positive Change |
| 2023-06-02 | 124.919998 | 126.389999 | 124.019997 | 124.250000 | 124.250000 | 61215000 | 1.205509 | Positive Change |
| 2023-06-05 | 123.360001 | 125.800003 | 123.029999 | 125.300003 | 125.300003 | 47950100 | 0.845073 | Positive Change |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2024-05-20 | 184.339996 | 186.669998 | 183.279999 | 183.539993 | 183.539993 | 30511800 | -0.628048 | Negative Change |
| 2024-05-21 | 182.300003 | 183.259995 | 180.750000 | 183.149994 | 183.149994 | 50839100 | -0.212487 | Insignificant Change |
| 2024-05-22 | 183.880005 | 185.220001 | 181.970001 | 183.130005 | 183.130005 | 28148800 | -0.010914 | Insignificant Change |
| 2024-05-23 | 183.660004 | 184.759995 | 180.080002 | 181.050003 | 181.050003 | 33670200 | -1.135806 | Negative Change |
| 2024-05-24 | 181.649994 | 182.440002 | 180.300003 | 180.750000 | 180.750000 | 27434100 | -0.165702 | Insignificant Change |
250 rows Ć 8 columns
# Plot Candlestick figure using Cufflinks QuantFig module
figure = cf.QuantFig(stock_df, title = 'Amazon.com, Inc. (AMZN) Candlestick Chart', name = 'AMZN')
figure.add_sma(periods =[14, 21], column = 'Close', color = ['magenta', 'green'])
figure.iplot(theme = 'white', up_color = 'green', down_color = 'red')
close_price_df = pd.read_csv('stock_data.csv')
close_price_df
| Date | Adj Close AMZN | Adj Close CAT | Adj Close DE | Adj Close EXC | Adj Close GOOGL | Adj Close JNJ | Adj Close JPM | Adj Close META | Adj Close PFE | ... | Volume AMZN | Volume CAT | Volume DE | Volume EXC | Volume GOOGL | Volume JNJ | Volume JPM | Volume META | Volume PFE | Volume PG | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 02/01/2018 | 59.450500 | 133.842728 | 142.165344 | 22.141459 | 53.466679 | 115.682427 | 88.566345 | 180.729614 | 25.982754 | ... | 53890000 | 5108400 | 1897600 | 7224506 | 31766000 | 6842100 | 13578800 | 18151900 | 17059833 | 7558900 |
| 1 | 03/01/2018 | 60.209999 | 134.047287 | 142.813110 | 21.845049 | 54.378876 | 116.787498 | 88.656593 | 183.967255 | 26.175268 | ... | 62176000 | 4146100 | 1392300 | 6750350 | 31318000 | 5350500 | 11901000 | 16886600 | 14183151 | 5863600 |
| 2 | 04/01/2018 | 60.479500 | 135.888229 | 143.631912 | 21.537451 | 54.590103 | 116.779190 | 89.926659 | 183.628555 | 26.232313 | ... | 60442000 | 4865800 | 1747000 | 6630619 | 26052000 | 4837600 | 12953700 | 13880900 | 13046517 | 6322500 |
| 3 | 05/01/2018 | 61.457001 | 138.035919 | 144.243759 | 21.358482 | 55.313984 | 117.743027 | 89.349350 | 186.138962 | 26.282223 | ... | 70894000 | 3945400 | 1574400 | 10013224 | 30250000 | 6158800 | 14155000 | 13574500 | 13167517 | 5949700 |
| 4 | 08/01/2018 | 62.343498 | 141.504761 | 144.954468 | 21.587782 | 55.509274 | 117.892593 | 89.481308 | 187.563522 | 25.989882 | ... | 85590000 | 5929200 | 2711800 | 15116925 | 24644000 | 5129800 | 12466500 | 17994700 | 19964341 | 4860700 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1759 | 30/12/2024 | 221.300003 | 363.010010 | 423.020019 | 37.630001 | 191.240005 | 143.339996 | 239.320007 | 591.239990 | 26.420000 | ... | 28321200 | 1422200 | 792500 | 4145100 | 14264700 | 6268700 | 5723800 | 7025900 | 32859100 | 4354500 |
| 1760 | 31/12/2024 | 219.389999 | 362.760010 | 423.700012 | 37.639999 | 189.300003 | 144.619995 | 239.710007 | 585.510010 | 26.530001 | ... | 24819700 | 1168100 | 772700 | 5347900 | 17466900 | 5811400 | 4871000 | 6019500 | 27085500 | 3957600 |
| 1761 | 02/01/2025 | 220.220001 | 359.769989 | 418.179993 | 37.660000 | 189.429993 | 144.020004 | 240.000000 | 599.239990 | 26.610001 | ... | 33956600 | 1802600 | 1005300 | 4578400 | 20370800 | 6051300 | 9220900 | 12682300 | 32899200 | 5401700 |
| 1762 | 03/01/2025 | 224.190002 | 363.790008 | 422.220001 | 38.049999 | 191.789993 | 144.190002 | 243.279999 | 604.630005 | 26.590000 | ... | 27485000 | 1390800 | 875100 | 4268100 | 18582000 | 5878800 | 9460700 | 11418600 | 33403100 | 5256600 |
| 1763 | 06/01/2025 | 228.149994 | 366.559998 | 423.644989 | 37.735001 | 197.110001 | 143.679993 | 242.264999 | 617.349976 | 26.850000 | ... | 11201274 | 413486 | 201880 | 886842 | 9118320 | 1716732 | 2527399 | 3611517 | 9345052 | 1788640 |
1764 rows Ć 61 columns
# Remove the "Date" column (first column) and the last 10 columns (Volume columns)
adjusted_close_df = close_price_df.iloc[:, 1:-10]
# Calculate the percentage daily return for the remaining columns
daily_returns_df = adjusted_close_df.pct_change() * 100
# Replace NaN values with 0
daily_returns_df.replace(np.nan, 0, inplace=True)
# Display the daily returns DataFrame
daily_returns_df
daily_returns_subset = daily_returns_df.iloc[:, :10]
daily_returns_subset
| Adj Close AMZN | Adj Close CAT | Adj Close DE | Adj Close EXC | Adj Close GOOGL | Adj Close JNJ | Adj Close JPM | Adj Close META | Adj Close PFE | Adj Close PG | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 1 | 1.277531 | 0.152836 | 0.455643 | -1.338709 | 1.706104 | 0.955264 | 0.101899 | 1.791428 | 0.740932 | -0.121353 |
| 2 | 0.447601 | 1.373353 | 0.573338 | -1.408091 | 0.388437 | -0.007114 | 1.432567 | -0.184108 | 0.217935 | 0.706883 |
| 3 | 1.616252 | 1.580483 | 0.425983 | -0.830964 | 1.326029 | 0.825350 | -0.641977 | 1.367111 | 0.190260 | 0.065795 |
| 4 | 1.442468 | 2.512999 | 0.492714 | 1.073576 | 0.353056 | 0.127028 | 0.147688 | 0.765321 | -1.112315 | 0.526085 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1759 | -1.094971 | -0.507037 | -1.260282 | 0.534333 | -0.788540 | -1.178908 | -0.767090 | -1.428787 | -0.751318 | -1.439275 |
| 1760 | -0.863083 | -0.068869 | 0.160747 | 0.026570 | -1.014433 | 0.892981 | 0.162961 | -0.969146 | 0.416354 | 0.335147 |
| 1761 | 0.378323 | -0.824242 | -1.302813 | 0.053136 | 0.068669 | -0.414874 | 0.120977 | 2.344961 | 0.301545 | -0.996122 |
| 1762 | 1.802743 | 1.117386 | 0.966093 | 1.035580 | 1.245843 | 0.118038 | 1.366666 | 0.899475 | -0.075161 | -0.512104 |
| 1763 | 1.766355 | 0.761425 | 0.337499 | -0.827854 | 2.773871 | -0.353707 | -0.417214 | 2.103761 | 0.977812 | -2.034761 |
1764 rows Ć 10 columns
# Insert the date column at the start of the Pandas DataFrame (@ index = 0)
daily_returns_df.insert(0, "Date", close_price_df['Date'])
daily_returns_df
daily_returns_subset.insert(0, "Date", close_price_df['Date'])
daily_returns_subset
| Date | Adj Close AMZN | Adj Close CAT | Adj Close DE | Adj Close EXC | Adj Close GOOGL | Adj Close JNJ | Adj Close JPM | Adj Close META | Adj Close PFE | Adj Close PG | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 02/01/2018 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 1 | 03/01/2018 | 1.277531 | 0.152836 | 0.455643 | -1.338709 | 1.706104 | 0.955264 | 0.101899 | 1.791428 | 0.740932 | -0.121353 |
| 2 | 04/01/2018 | 0.447601 | 1.373353 | 0.573338 | -1.408091 | 0.388437 | -0.007114 | 1.432567 | -0.184108 | 0.217935 | 0.706883 |
| 3 | 05/01/2018 | 1.616252 | 1.580483 | 0.425983 | -0.830964 | 1.326029 | 0.825350 | -0.641977 | 1.367111 | 0.190260 | 0.065795 |
| 4 | 08/01/2018 | 1.442468 | 2.512999 | 0.492714 | 1.073576 | 0.353056 | 0.127028 | 0.147688 | 0.765321 | -1.112315 | 0.526085 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1759 | 30/12/2024 | -1.094971 | -0.507037 | -1.260282 | 0.534333 | -0.788540 | -1.178908 | -0.767090 | -1.428787 | -0.751318 | -1.439275 |
| 1760 | 31/12/2024 | -0.863083 | -0.068869 | 0.160747 | 0.026570 | -1.014433 | 0.892981 | 0.162961 | -0.969146 | 0.416354 | 0.335147 |
| 1761 | 02/01/2025 | 0.378323 | -0.824242 | -1.302813 | 0.053136 | 0.068669 | -0.414874 | 0.120977 | 2.344961 | 0.301545 | -0.996122 |
| 1762 | 03/01/2025 | 1.802743 | 1.117386 | 0.966093 | 1.035580 | 1.245843 | 0.118038 | 1.366666 | 0.899475 | -0.075161 | -0.512104 |
| 1763 | 06/01/2025 | 1.766355 | 0.761425 | 0.337499 | -0.827854 | 2.773871 | -0.353707 | -0.417214 | 2.103761 | 0.977812 | -2.034761 |
1764 rows Ć 11 columns
# Plot the stocks daily returns
plot_financial_data(daily_returns_subset, 'Percentage Daily Returns [%]')
# Plot a heatmap showing the correlations between daily returns
# Strong positive correlations between Catterpillar and John Deere - both into heavy equipment and machinery
# META and Google - both into Tech and Cloud Computing
plt.figure(figsize = (10, 8))
sns.heatmap(daily_returns_subset.drop(columns = ['Date']).corr(), annot = True);
# Plot the Pairplot between stocks daily returns
sns.pairplot(daily_returns_subset);
# Function to scale stock prices based on their initial starting price
# The objective of this function is to set all prices to start at a value of 1
def price_scaling(raw_prices_df):
scaled_prices_df = raw_prices_df.copy()
for i in raw_prices_df.columns[1:]:
scaled_prices_df[i] = raw_prices_df[i]/raw_prices_df[i][0]
return scaled_prices_df
price_scaling(close_price_df)
| Date | Adj Close AMZN | Adj Close CAT | Adj Close DE | Adj Close EXC | Adj Close GOOGL | Adj Close JNJ | Adj Close JPM | Adj Close META | Adj Close PFE | ... | Volume AMZN | Volume CAT | Volume DE | Volume EXC | Volume GOOGL | Volume JNJ | Volume JPM | Volume META | Volume PFE | Volume PG | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 02/01/2018 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
| 1 | 03/01/2018 | 1.012775 | 1.001528 | 1.004556 | 0.986613 | 1.017061 | 1.009553 | 1.001019 | 1.017914 | 1.007409 | ... | 1.153758 | 0.811624 | 0.733716 | 0.934368 | 0.985897 | 0.781997 | 0.876440 | 0.930294 | 0.831377 | 0.775721 |
| 2 | 04/01/2018 | 1.017309 | 1.015283 | 1.010316 | 0.972721 | 1.021012 | 1.009481 | 1.015359 | 1.016040 | 1.009605 | ... | 1.121581 | 0.952510 | 0.920637 | 0.917795 | 0.820122 | 0.707034 | 0.953965 | 0.764708 | 0.764751 | 0.836431 |
| 3 | 05/01/2018 | 1.033751 | 1.031329 | 1.014620 | 0.964638 | 1.034551 | 1.017813 | 1.008841 | 1.029931 | 1.011526 | ... | 1.315532 | 0.772336 | 0.829680 | 1.386008 | 0.952276 | 0.900133 | 1.042434 | 0.747828 | 0.771843 | 0.787112 |
| 4 | 08/01/2018 | 1.048662 | 1.057247 | 1.019619 | 0.974994 | 1.038203 | 1.019105 | 1.010331 | 1.037813 | 1.000274 | ... | 1.588235 | 1.160677 | 1.429068 | 2.092451 | 0.775798 | 0.749741 | 0.918086 | 0.991340 | 1.170254 | 0.643043 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1759 | 30/12/2024 | 3.722425 | 2.712213 | 2.975550 | 1.699527 | 3.576807 | 1.239082 | 2.702155 | 3.271406 | 1.016828 | ... | 0.525537 | 0.278404 | 0.417633 | 0.573755 | 0.449056 | 0.916195 | 0.421525 | 0.387061 | 1.926109 | 0.576076 |
| 1760 | 31/12/2024 | 3.690297 | 2.710345 | 2.980333 | 1.699978 | 3.540523 | 1.250147 | 2.706559 | 3.239702 | 1.021062 | ... | 0.460562 | 0.228663 | 0.407199 | 0.740244 | 0.549861 | 0.849359 | 0.358721 | 0.331618 | 1.587677 | 0.523568 |
| 1761 | 02/01/2025 | 3.704258 | 2.688006 | 2.941504 | 1.700882 | 3.542954 | 1.244960 | 2.709833 | 3.315671 | 1.024141 | ... | 0.630109 | 0.352870 | 0.529774 | 0.633732 | 0.641277 | 0.884421 | 0.679066 | 0.698676 | 1.928460 | 0.714615 |
| 1762 | 03/01/2025 | 3.771036 | 2.718041 | 2.969922 | 1.718496 | 3.587094 | 1.246430 | 2.746867 | 3.345495 | 1.023371 | ... | 0.510020 | 0.272257 | 0.461161 | 0.590781 | 0.584965 | 0.859210 | 0.696726 | 0.629058 | 1.957997 | 0.695419 |
| 1763 | 06/01/2025 | 3.837646 | 2.738737 | 2.979946 | 1.704269 | 3.686595 | 1.242021 | 2.735407 | 3.415876 | 1.033378 | ... | 0.207854 | 0.080942 | 0.106387 | 0.122755 | 0.287047 | 0.250907 | 0.186128 | 0.198961 | 0.547781 | 0.236627 |
1764 rows Ć 61 columns
import random
def generate_portfolio_weights(n):
weights = []
for i in range(n):
weights.append(random.random())
# let's make the sum of all weights add up to 1
weights = weights/np.sum(weights)
return weights
# Call the function (Run this cell multiple times to generate different outputs)
weights = generate_portfolio_weights(10)
print(weights)
[0.12283562 0.11085941 0.07078284 0.00170883 0.13510652 0.1370366 0.13327715 0.1258203 0.08218237 0.08039037]
# Let's display "close_price_df" Pandas DataFrame
close_price_df
# Select the "Date" column and the next 10 columns
columns_to_keep = close_price_df.columns[:11] # First column (Date) + next 10 columns
# Create a new DataFrame with only the selected columns
close_price_df_subset = close_price_df[columns_to_keep]
# Display the resulting DataFrame
close_price_df_subset
| Date | Adj Close AMZN | Adj Close CAT | Adj Close DE | Adj Close EXC | Adj Close GOOGL | Adj Close JNJ | Adj Close JPM | Adj Close META | Adj Close PFE | Adj Close PG | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 02/01/2018 | 59.450500 | 133.842728 | 142.165344 | 22.141459 | 53.466679 | 115.682427 | 88.566345 | 180.729614 | 25.982754 | 75.279755 |
| 1 | 03/01/2018 | 60.209999 | 134.047287 | 142.813110 | 21.845049 | 54.378876 | 116.787498 | 88.656593 | 183.967255 | 26.175268 | 75.188400 |
| 2 | 04/01/2018 | 60.479500 | 135.888229 | 143.631912 | 21.537451 | 54.590103 | 116.779190 | 89.926659 | 183.628555 | 26.232313 | 75.719894 |
| 3 | 05/01/2018 | 61.457001 | 138.035919 | 144.243759 | 21.358482 | 55.313984 | 117.743027 | 89.349350 | 186.138962 | 26.282223 | 75.769714 |
| 4 | 08/01/2018 | 62.343498 | 141.504761 | 144.954468 | 21.587782 | 55.509274 | 117.892593 | 89.481308 | 187.563522 | 25.989882 | 76.168327 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1759 | 30/12/2024 | 221.300003 | 363.010010 | 423.020019 | 37.630001 | 191.240005 | 143.339996 | 239.320007 | 591.239990 | 26.420000 | 167.089996 |
| 1760 | 31/12/2024 | 219.389999 | 362.760010 | 423.700012 | 37.639999 | 189.300003 | 144.619995 | 239.710007 | 585.510010 | 26.530001 | 167.649994 |
| 1761 | 02/01/2025 | 220.220001 | 359.769989 | 418.179993 | 37.660000 | 189.429993 | 144.020004 | 240.000000 | 599.239990 | 26.610001 | 165.979996 |
| 1762 | 03/01/2025 | 224.190002 | 363.790008 | 422.220001 | 38.049999 | 191.789993 | 144.190002 | 243.279999 | 604.630005 | 26.590000 | 165.130005 |
| 1763 | 06/01/2025 | 228.149994 | 366.559998 | 423.644989 | 37.735001 | 197.110001 | 143.679993 | 242.264999 | 617.349976 | 26.850000 | 161.770004 |
1764 rows Ć 11 columns
# Scale stock prices using the "price_scaling" function that we defined earlier (make all stock values start at 1)
portfolio_df = close_price_df_subset.copy()
scaled_df = price_scaling(portfolio_df)
scaled_df
| Date | Adj Close AMZN | Adj Close CAT | Adj Close DE | Adj Close EXC | Adj Close GOOGL | Adj Close JNJ | Adj Close JPM | Adj Close META | Adj Close PFE | Adj Close PG | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 02/01/2018 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
| 1 | 03/01/2018 | 1.012775 | 1.001528 | 1.004556 | 0.986613 | 1.017061 | 1.009553 | 1.001019 | 1.017914 | 1.007409 | 0.998786 |
| 2 | 04/01/2018 | 1.017309 | 1.015283 | 1.010316 | 0.972721 | 1.021012 | 1.009481 | 1.015359 | 1.016040 | 1.009605 | 1.005847 |
| 3 | 05/01/2018 | 1.033751 | 1.031329 | 1.014620 | 0.964638 | 1.034551 | 1.017813 | 1.008841 | 1.029931 | 1.011526 | 1.006509 |
| 4 | 08/01/2018 | 1.048662 | 1.057247 | 1.019619 | 0.974994 | 1.038203 | 1.019105 | 1.010331 | 1.037813 | 1.000274 | 1.011804 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1759 | 30/12/2024 | 3.722425 | 2.712213 | 2.975550 | 1.699527 | 3.576807 | 1.239082 | 2.702155 | 3.271406 | 1.016828 | 2.219587 |
| 1760 | 31/12/2024 | 3.690297 | 2.710345 | 2.980333 | 1.699978 | 3.540523 | 1.250147 | 2.706559 | 3.239702 | 1.021062 | 2.227026 |
| 1761 | 02/01/2025 | 3.704258 | 2.688006 | 2.941504 | 1.700882 | 3.542954 | 1.244960 | 2.709833 | 3.315671 | 1.024141 | 2.204842 |
| 1762 | 03/01/2025 | 3.771036 | 2.718041 | 2.969922 | 1.718496 | 3.587094 | 1.246430 | 2.746867 | 3.345495 | 1.023371 | 2.193551 |
| 1763 | 06/01/2025 | 3.837646 | 2.738737 | 2.979946 | 1.704269 | 3.686595 | 1.242021 | 2.735407 | 3.415876 | 1.033378 | 2.148918 |
1764 rows Ć 11 columns
portfolio_df
| Date | Adj Close AMZN | Adj Close CAT | Adj Close DE | Adj Close EXC | Adj Close GOOGL | Adj Close JNJ | Adj Close JPM | Adj Close META | Adj Close PFE | Adj Close PG | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 02/01/2018 | 59.450500 | 133.842728 | 142.165344 | 22.141459 | 53.466679 | 115.682427 | 88.566345 | 180.729614 | 25.982754 | 75.279755 |
| 1 | 03/01/2018 | 60.209999 | 134.047287 | 142.813110 | 21.845049 | 54.378876 | 116.787498 | 88.656593 | 183.967255 | 26.175268 | 75.188400 |
| 2 | 04/01/2018 | 60.479500 | 135.888229 | 143.631912 | 21.537451 | 54.590103 | 116.779190 | 89.926659 | 183.628555 | 26.232313 | 75.719894 |
| 3 | 05/01/2018 | 61.457001 | 138.035919 | 144.243759 | 21.358482 | 55.313984 | 117.743027 | 89.349350 | 186.138962 | 26.282223 | 75.769714 |
| 4 | 08/01/2018 | 62.343498 | 141.504761 | 144.954468 | 21.587782 | 55.509274 | 117.892593 | 89.481308 | 187.563522 | 25.989882 | 76.168327 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1759 | 30/12/2024 | 221.300003 | 363.010010 | 423.020019 | 37.630001 | 191.240005 | 143.339996 | 239.320007 | 591.239990 | 26.420000 | 167.089996 |
| 1760 | 31/12/2024 | 219.389999 | 362.760010 | 423.700012 | 37.639999 | 189.300003 | 144.619995 | 239.710007 | 585.510010 | 26.530001 | 167.649994 |
| 1761 | 02/01/2025 | 220.220001 | 359.769989 | 418.179993 | 37.660000 | 189.429993 | 144.020004 | 240.000000 | 599.239990 | 26.610001 | 165.979996 |
| 1762 | 03/01/2025 | 224.190002 | 363.790008 | 422.220001 | 38.049999 | 191.789993 | 144.190002 | 243.279999 | 604.630005 | 26.590000 | 165.130005 |
| 1763 | 06/01/2025 | 228.149994 | 366.559998 | 423.644989 | 37.735001 | 197.110001 | 143.679993 | 242.264999 | 617.349976 | 26.850000 | 161.770004 |
1764 rows Ć 11 columns
# Use enumerate() method to obtain the stock names along with a counter "i" (0, 1, 2, 3,..etc.)
# This counter "i" will be used as an index to access elements in the "weights" list
initial_investment = 1000000
for i, stock in enumerate(scaled_df.columns[1:]):
portfolio_df[stock] = weights[i] * scaled_df[stock] * initial_investment
portfolio_df.round(1)
| Date | Adj Close AMZN | Adj Close CAT | Adj Close DE | Adj Close EXC | Adj Close GOOGL | Adj Close JNJ | Adj Close JPM | Adj Close META | Adj Close PFE | Adj Close PG | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 02/01/2018 | 122835.6 | 110859.4 | 70782.8 | 1708.8 | 135106.5 | 137036.6 | 133277.1 | 125820.3 | 82182.4 | 80390.4 |
| 1 | 03/01/2018 | 124404.9 | 111028.8 | 71105.4 | 1686.0 | 137411.6 | 138345.7 | 133413.0 | 128074.3 | 82791.3 | 80292.8 |
| 2 | 04/01/2018 | 124961.7 | 112553.7 | 71513.0 | 1662.2 | 137945.3 | 138335.8 | 135324.2 | 127838.5 | 82971.7 | 80860.4 |
| 3 | 05/01/2018 | 126981.4 | 114332.6 | 71817.7 | 1648.4 | 139774.5 | 139477.6 | 134455.4 | 129586.2 | 83129.6 | 80913.6 |
| 4 | 08/01/2018 | 128813.1 | 117205.7 | 72171.5 | 1666.1 | 140268.0 | 139654.7 | 134654.0 | 130577.9 | 82204.9 | 81339.3 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1759 | 30/12/2024 | 457246.3 | 300674.4 | 210617.8 | 2904.2 | 483250.0 | 169799.6 | 360135.5 | 411609.3 | 83565.4 | 178433.4 |
| 1760 | 31/12/2024 | 453299.9 | 300467.3 | 210956.4 | 2905.0 | 478347.7 | 171315.8 | 360722.4 | 407620.2 | 83913.3 | 179031.5 |
| 1761 | 02/01/2025 | 455014.8 | 297990.7 | 208208.0 | 2906.5 | 478676.2 | 170605.1 | 361158.8 | 417178.7 | 84166.3 | 177248.1 |
| 1762 | 03/01/2025 | 463217.6 | 301320.4 | 210219.5 | 2936.6 | 484639.8 | 170806.5 | 366094.6 | 420931.2 | 84103.1 | 176340.4 |
| 1763 | 06/01/2025 | 471399.7 | 303614.7 | 210929.0 | 2912.3 | 498083.0 | 170202.3 | 364567.2 | 429786.5 | 84925.4 | 172752.3 |
1764 rows Ć 11 columns
def asset_allocation(df, weights, initial_investment):
portfolio_df = df.copy()
# Scale stock prices using the "price_scaling" function that we defined earlier (Make them all start at 1)
scaled_df = price_scaling(df)
for i, stock in enumerate(scaled_df.columns[1:]):
portfolio_df[stock] = scaled_df[stock] * weights[i] * initial_investment
# Sum up all values and place the result in a new column titled "portfolio value [$]"
# Note that we excluded the date column from this calculation
portfolio_df['Portfolio Value [$]'] = portfolio_df[portfolio_df != 'Date'].sum(axis = 1, numeric_only = True)
# Calculate the portfolio percentage daily return and replace NaNs with zeros
portfolio_df['Portfolio Daily Return [%]'] = portfolio_df['Portfolio Value [$]'].pct_change(1) * 100
portfolio_df.replace(np.nan, 0, inplace = True)
return portfolio_df
n = len(close_price_df_subset.columns)-1
# Let's generate random weights
print('Number of stocks under consideration = {}'.format(n))
weights = generate_portfolio_weights(n).round(6)
print('Portfolio weights = {}'.format(weights))
Number of stocks under consideration = 10 Portfolio weights = [0.086754 0.024816 0.12297 0.085382 0.128072 0.123358 0.165131 0.131114 0.117482 0.01492 ]
# Let's test out the "asset_allocation" function
portfolio_df = asset_allocation(close_price_df_subset, weights, 1000000)
portfolio_df.round(2)
| Date | Adj Close AMZN | Adj Close CAT | Adj Close DE | Adj Close EXC | Adj Close GOOGL | Adj Close JNJ | Adj Close JPM | Adj Close META | Adj Close PFE | Adj Close PG | Portfolio Value [$] | Portfolio Daily Return [%] | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 02/01/2018 | 86754.00 | 24816.00 | 122970.00 | 85382.00 | 128072.00 | 123358.00 | 165131.00 | 131114.00 | 117482.00 | 14920.00 | 999999.00 | 0.00 |
| 1 | 03/01/2018 | 87862.31 | 24853.93 | 123530.30 | 84238.98 | 130257.04 | 124536.39 | 165299.27 | 133462.81 | 118352.46 | 14901.89 | 1007295.39 | 0.73 |
| 2 | 04/01/2018 | 88255.58 | 25195.26 | 124238.55 | 83052.82 | 130763.01 | 124527.53 | 167667.29 | 133217.10 | 118610.39 | 15007.23 | 1010534.77 | 0.32 |
| 3 | 05/01/2018 | 89682.01 | 25593.47 | 124767.78 | 82362.68 | 132496.96 | 125555.32 | 166590.90 | 135038.32 | 118836.06 | 15017.11 | 1015940.63 | 0.53 |
| 4 | 08/01/2018 | 90975.65 | 26236.63 | 125382.53 | 83246.91 | 132964.75 | 125714.81 | 166836.94 | 136071.80 | 117514.23 | 15096.11 | 1020040.36 | 0.40 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1759 | 30/12/2024 | 322935.22 | 67306.28 | 365903.32 | 145108.99 | 458088.86 | 152850.66 | 446209.58 | 428927.16 | 119459.03 | 33116.24 | 2539905.35 | -0.95 |
| 1760 | 31/12/2024 | 320148.02 | 67259.93 | 366491.50 | 145147.55 | 453441.86 | 154215.59 | 446936.73 | 424770.23 | 119956.40 | 33227.23 | 2531595.04 | -0.33 |
| 1761 | 02/01/2025 | 321359.21 | 66705.54 | 361716.80 | 145224.67 | 453753.23 | 153575.79 | 447477.42 | 434730.92 | 120318.12 | 32896.25 | 2537757.96 | 0.24 |
| 1762 | 03/01/2025 | 327152.49 | 67450.90 | 365211.32 | 146728.59 | 459406.28 | 153757.06 | 453592.95 | 438641.22 | 120227.69 | 32727.79 | 2564896.30 | 1.07 |
| 1763 | 06/01/2025 | 332931.17 | 67964.49 | 366443.91 | 145513.89 | 472149.62 | 153213.22 | 451700.49 | 447869.18 | 121403.29 | 32061.85 | 2591251.11 | 1.03 |
1764 rows Ć 13 columns
plot_financial_data(portfolio_df[['Date', 'Portfolio Daily Return [%]']], 'Portfolio Percentage Daily Return [%]')
# Plot each stock position in our portfolio over time
# This graph shows how our initial investment in each individual stock grows over time
plot_financial_data(portfolio_df.drop(['Portfolio Value [$]', 'Portfolio Daily Return [%]'], axis = 1), 'Portfolio positions [$]')
# Plot the total daily value of the portfolio (sum of all positions)
plot_financial_data(portfolio_df[['Date', 'Portfolio Value [$]']], 'Total Portfolio Value [$]')
# Let's define the simulation engine function
# The function receives:
# (1) portfolio weights
# (2) initial investment amount
# The function performs asset allocation and calculates portfolio statistical metrics including Sharpe ratio
# The function returns:
# (1) Expected portfolio return
# (2) Expected volatility
# (3) Sharpe ratio
# (4) Return on investment
# (5) Final portfolio value in dollars
def simulation_engine(weights, initial_investment):
# Perform asset allocation using the random weights (sent as arguments to the function)
portfolio_df = asset_allocation(close_price_df_subset, weights, initial_investment)
# Calculate the return on the investment
# Return on investment is calculated using the last final value of the portfolio compared to its initial value
return_on_investment = ((portfolio_df['Portfolio Value [$]'][-1:] -
portfolio_df['Portfolio Value [$]'][0])/
portfolio_df['Portfolio Value [$]'][0]) * 100
# Daily change of every stock in the portfolio (Note that we dropped the date, portfolio daily worth and daily % returns)
portfolio_daily_return_df = portfolio_df.drop(columns = ['Date', 'Portfolio Value [$]', 'Portfolio Daily Return [%]'])
portfolio_daily_return_df = portfolio_daily_return_df.pct_change(1)
# Portfolio Expected Return formula
expected_portfolio_return = np.sum(weights * portfolio_daily_return_df.mean() ) * 252
# Portfolio volatility (risk) formula
# The risk of an asset is measured using the standard deviation which indicates the dispertion away from the mean
# The risk of a portfolio is not a simple sum of the risks of the individual assets within the portfolio
# Portfolio risk must consider correlations between assets within the portfolio which is indicated by the covariance
# The covariance determines the relationship between the movements of two random variables
# When two stocks move together, they have a positive covariance when they move inversely, the have a negative covariance
covariance = portfolio_daily_return_df.cov() * 252
expected_volatility = np.sqrt(np.dot(weights.T, np.dot(covariance, weights)))
rf = 0.03
# Calculate Sharpe ratio
sharpe_ratio = (expected_portfolio_return - rf)/expected_volatility
return expected_portfolio_return, expected_volatility, sharpe_ratio, portfolio_df['Portfolio Value [$]'][-1:].values[0], return_on_investment.values[0]
Sharpe ratio is a great risk to reward measure. The formula goes as follows: $$ \text{Sharpe Ratio} = \frac{R_p - R_f}{\sigma_p} $$ The porfolio return Rp subtracted from the risk free rate Rf gives us the excess return. While the standard deviation gives us the risk.
# Let's test out the "simulation_engine" function and print out statistical metrics
# Define the initial investment amount
initial_investment = 1000000
portfolio_metrics = simulation_engine(weights, initial_investment)
print('Expected Portfolio Annual Return = {:.2f}%'.format(portfolio_metrics[0] * 100))
print('Portfolio Standard Deviation (Volatility) = {:.2f}%'.format(portfolio_metrics[1] * 100))
print('Sharpe Ratio = {:.2f}'.format(portfolio_metrics[2]))
print('Portfolio Final Value = ${:.2f}'.format(portfolio_metrics[3]))
print('Return on Investment = {:.2f}%'.format(portfolio_metrics[4]))
Expected Portfolio Annual Return = 16.96% Portfolio Standard Deviation (Volatility) = 20.08% Sharpe Ratio = 0.69 Portfolio Final Value = $2591251.11 Return on Investment = 159.13%
Monte Carlo Simulation¶
A Monte Carlo simulation is an efficient way to run multiple simulations on a single portfolio. For example, we can run a test of 10,000 simulations where Python can randomly generate weights and find different potential asset allocation weights. With this large dataset we are able to find the optimal case
# Set the number of simulation runs
sim_runs = 1000
initial_investment = 1000000
# Placeholder to store all weights
weights_runs = np.zeros((sim_runs, n))
# Placeholder to store all Sharpe ratios
sharpe_ratio_runs = np.zeros(sim_runs)
# Placeholder to store all expected returns
expected_portfolio_returns_runs = np.zeros(sim_runs)
# Placeholder to store all volatility values
volatility_runs = np.zeros(sim_runs)
# Placeholder to store all returns on investment
return_on_investment_runs = np.zeros(sim_runs)
# Placeholder to store all final portfolio values
final_value_runs = np.zeros(sim_runs)
for i in range(sim_runs):
# Generate random weights
weights = generate_portfolio_weights(n)
# Store the weights
weights_runs[i,:] = weights
# Call "simulation_engine" function and store Sharpe ratio, return and volatility
# Note that asset allocation is performed using the "asset_allocation" function
expected_portfolio_returns_runs[i], volatility_runs[i], sharpe_ratio_runs[i], final_value_runs[i], return_on_investment_runs[i] = simulation_engine(weights, initial_investment)
# Return the index of the maximum Sharpe ratio (Best simulation run)
sharpe_ratio_runs.argmax()
802
# Return the maximum Sharpe ratio value
sharpe_ratio_runs.max()
0.8069321105045569
#Obtain portfolio weights that correspond to maximum Sharpe Ratio
weights_runs[sharpe_ratio_runs.argmax(), :]
array([0.25925203, 0.0905036 , 0.09506045, 0.04489691, 0.07758603,
0.01590731, 0.1133639 , 0.06804111, 0.01813227, 0.2172564 ])
# Return Sharpe ratio, volatility corresponding to the best weights allocation (maximum Sharpe ratio)
optimal_portfolio_return, optimal_volatility, optimal_sharpe_ratio, highest_final_value, optimal_return_on_investment = simulation_engine(weights_runs[sharpe_ratio_runs.argmax(), :], initial_investment)
print('Best Portfolio Metrics Based on {} Monte Carlo Simulation Runs:'.format(sim_runs))
print(' - Portfolio Expected Annual Return = {:.02f}%'.format(optimal_portfolio_return * 100))
print(' - Portfolio Standard Deviation (Volatility) = {:.02f}%'.format(optimal_volatility * 100))
print(' - Sharpe Ratio = {:.02f}'.format(optimal_sharpe_ratio))
print(' - Final Value = ${:.02f}'.format(highest_final_value))
print(' - Return on Investment = {:.02f}%'.format(optimal_return_on_investment))
Best Portfolio Metrics Based on 1000 Monte Carlo Simulation Runs: - Portfolio Expected Annual Return = 19.45% - Portfolio Standard Deviation (Volatility) = 20.38% - Sharpe Ratio = 0.81 - Final Value = $2936480.02 - Return on Investment = 193.65%
# Create a DataFrame that contains volatility, return, and Sharpe ratio for all simualation runs
sim_out_df = pd.DataFrame({'Volatility': volatility_runs.tolist(), 'Portfolio_Return': expected_portfolio_returns_runs.tolist(), 'Sharpe_Ratio': sharpe_ratio_runs.tolist() })
sim_out_df.head
<bound method NDFrame.head of Volatility Portfolio_Return Sharpe_Ratio 0 0.186252 0.159995 0.697949 1 0.201230 0.169163 0.691564 2 0.190110 0.171342 0.743475 3 0.202554 0.179299 0.737084 4 0.195712 0.164027 0.684815 .. ... ... ... 995 0.190927 0.158218 0.671557 996 0.198990 0.171074 0.708951 997 0.199187 0.168732 0.696491 998 0.197026 0.162729 0.673664 999 0.195203 0.178939 0.762992 [1000 rows x 3 columns]>
# Plot volatility vs. return for all simulation runs
# Highlight the volatility and return that corresponds to the highest Sharpe ratio
import plotly.graph_objects as go
fig = px.scatter(sim_out_df, x = 'Volatility', y = 'Portfolio_Return', color = 'Sharpe_Ratio', size = 'Sharpe_Ratio', hover_data = ['Sharpe_Ratio'] )
fig.update_layout({'plot_bgcolor': "white"})
fig.show()
# Let's highlight the point with the highest Sharpe ratio
fig = px.scatter(sim_out_df, x = 'Volatility', y = 'Portfolio_Return', color = 'Sharpe_Ratio', size = 'Sharpe_Ratio', hover_data = ['Sharpe_Ratio'] )
fig.add_trace(go.Scatter(x = [optimal_volatility], y = [optimal_portfolio_return], mode = 'markers', name = 'Optimal Point', marker = dict(size=[40], color = 'red')))
fig.update_layout(coloraxis_colorbar = dict(y = 0.7, dtick = 5))
fig.update_layout({'plot_bgcolor': "white"})
fig.show()
# Plot interactive plot for volatility
fig = px.line(sim_out_df, y = 'Volatility')
fig.show()
# Plot interactive plot for Portfolio Return
fig = px.line(sim_out_df, y = 'Portfolio_Return')
fig.update_traces(line_color = 'red')
fig.show()
# Plot interactive plot for Portfolio Return
fig = px.line(sim_out_df, y = 'Sharpe_Ratio')
fig.update_traces(line_color = 'purple')
fig.show()